# Monte Carlo Experiment III
rm(list=ls())
library(Matching)
library(MASS)
library(ebal)

## Setup
sims <- 20
n    <- 5000

# X1-X3 3 MVN var 2, 1, 1, covars 1, -1, -.5
vars   <- c(2,1,1)
covars <- c(1,-1,-.5)
mu     <- c(0,0,0)

Sigma <- diag(vars)
Sigma[2,1] <- Sigma[1,2] <- covars[1]
Sigma[3,1] <- Sigma[1,3] <- covars[2]
Sigma[3,2] <- Sigma[2,3] <- covars[3]

# pick target sample size
#nsim <- 1500
nsim <- 600
#nsim  <- 300

# pick design
#Design <- 1    # design 1 (strong separation)
Design <- 2   # design 2 (weak separation)
#Design <- 3   # design 3 (middle case)

# ratio: treated to controls
rr <- c(1,2,5)
big.est.store <- big.para.store <- list()

## MC Simulation Starts here
for(mm in 1:length(rr)){  # loop over the three treated to controls ratios
  r <- rr[mm]
 cat("Starting MC Simulation with C to T ratio of",r,"\n")
# target sample size treated
tr <- nsim/(1+r); tr
# target sample size controls
co <- nsim - tr; co
# storage for different estimators and parameters
para  <- c("FracCtoT","N","tr","co","corY.PS","corW.PS","corPS.PS1","corPS.PS2","corPS.PS3")
est    <- c("RAW","MD","GM","PS1","PS2","PS3","PSMD1","PSMD2","PSMD3","PSW1","PSW2","PSW3","EB")
est.st    <- matrix(NA,sims,length(est))
para.st  <- matrix(NA,sims,length(para))
colnames(est.st)   <- est
colnames(para.st) <- para

# expand storage for three different outcomes
est.store <- para.store <- list()
for(k in 1:3){
est.store[[k]]   <- est.st
para.store[[k]]  <- para.st
}

for(i in 1:sims){ # start MC for a single r

# draw Xs
X13 <- mvrnorm(n,mu=mu,Sigma=Sigma, empirical = F)
X1 <- X13[,1]
X2 <- X13[,2]
X3 <- X13[,3]
X4 <- runif(n,-3,3)
X5 <- rchisq(n, df=1)
X6 <- rbinom(n,size=1,prob=.5)

### simulate treatment indicator based on design
if (Design == 1 ){
## design 1 (strong separation)
e <- rnorm(n,mean=0,sd=sqrt(30))
}
if (Design == 2 ) {
## design 2 (weak separation)
e <- rnorm(n,mean=0,sd=sqrt(100))
}
if (Design == 3 ) { 
## design 3 (middle case)
# rescale to mean .5 and var 67.6
e <- rchisq(n, df = 5, ncp = 0)
findconst <- function(cc,e){abs(var(e*cc)-67.6)}
out <- optimize(findconst,lower=1,upper=67,e=e)
e <- e*out$minimum
e <- e-(mean(e)-.5)
}

# simulate treatment indicator
xb <- X1 + 2*X2 - 2*X3  - X4 - .5*X5 + X6 +  e
W  <- as.numeric(xb>0)

# sample to target sizes
sim.dat <- data.frame(W,X1,X2,X3,X4,X5,X6)
tr.keep <- sample(which(sim.dat$W==1),tr,replace=F)
co.keep <- sample(which(sim.dat$W==0),co,replace=F)
sim.dat <- sim.dat[c(tr.keep,co.keep),]
W <- sim.dat[,"W"]
X <- as.matrix(sim.dat[,names(sim.dat)[-1]])
X1 <- X[,"X1"]; X2 <- X[,"X2"]; X3 <- X[,"X3"]; X4 <- X[,"X4"]; X5 <- X[,"X5"];X6 <- X[,"X6"]

# true linear PS
PS.out  <- glm(W~X,family=binomial(link = "probit"))
PS.true <- PS.out$fitted

# ps specifciation formula
psxlist <- pslist <- list()
# ps 1
formula.PS1 <- formula(W~X1+X2+X3+X4+X5+X6)
psxlist[[1]] <- model.matrix(formula.PS1)[,-1]
# ps 2
formula.PS2  <- formula(W~I(X1^2)+I(X2^2)+X3+I(X4^2)+I(X5^2)+X6)
psxlist[[2]] <- model.matrix(formula.PS2)[,-1]
# ps 3
formula.PS3  <- formula(W~I(X1*X3)+I(X2^2)+X4+X5+X6)
psxlist[[3]] <- model.matrix(formula.PS3)[,-1]
# PS estimation
for(p in 1:3){
pslist[[p]] <- glm(W~psxlist[[p]],family=binomial(link = "probit"))
}

# matching specification formula
formula.Match <- formula(W~X1+X2+X3+X4+X5+X6)
X.Match <- model.matrix(formula.Match)[,-1]

##
# outcome: treatment effect is zero
u  <- rnorm(nrow(X))
# ouctome 1 (linear)
Y1 <- X1 + X2 + X3 - X4 + X5 + X6 + u
# ouctome 2 (medium nonlinear)
Y2 <- X1 + X2 + 0.2*X3*X4 - sqrt(X5) + u
# outcome 3 (strongly nonlinear)
Y3 <- (X1 + X2 + X5)^2 + u
# combine 
Y.mat <- cbind(Y1,Y2,Y3)

###
## implement estimators

# raw
for(k in 1:3){
Y <- Y.mat[,k]
est.store[[k]][i,"RAW"] <- mean(Y[W==1]) - mean(Y[W==0])
}

# EB
out.eb <- try(ebalance(
              Treatment=W,
              X=X.Match,
              print.level = -1
              ),silent=FALSE)
if(class(out.eb)=="try-error"){
 cat("EB did not converge in sims",i,"\n")
 } else {
  d <- out.eb$w
  for(k in 1:3){
    Y <- Y.mat[,k] 
   est.store[[k]][i,"EB"]  <- mean(Y[W==1]) - (sum(Y[W==0]*d)/sum(d))
 }
}

# MD Matching
out <- Match(Tr=W,X=X.Match,estimand="ATT",M=1,BiasAdjust = FALSE)
   for(k in 1:3){
    Y <- Y.mat[,k]
    est.store[[k]][i,"MD"] <- (sum(Y[out$index.treated]*out$weights)/sum(out$weights)) - (sum(Y[out$index.control]*out$weights)/sum(out$weights))
   }

## GenMatching
suppressWarnings(gout <- GenMatch(Tr=W, X=X.Match, BalanceMatrix=X.Match, estimand="ATT", M=1, weights=NULL,
                         print.level=0))
out  <- Match(Tr=W,X=X.Match,Weight.matrix=gout,estimand="ATT",M=1,BiasAdjust=FALSE)
  for(k in 1:3){
  Y <- Y.mat[,k]
   est.store[[k]][i,"GM"] <- (sum(Y[out$index.treated]*out$weights)/sum(out$weights)) - (sum(Y[out$index.control]*out$weights)/sum(out$weights))
}

# PS estimation
psname   <- c("PS1","PS2","PS3")
psmdname <- c("PSMD1","PSMD2","PSMD3")
wpsname  <- c("PSW1","PSW2","PSW3")

# start PS techniques 3 PS scores
for(p in 1:3){
 if(sum(pslist[[p]]$fitted<(0+sqrt(.Machine$double.eps)))> 0 || sum(pslist[[p]]$fitted>(1-sqrt(.Machine$double.eps)))>0){ # error reporting if perfect sep
  cat("Perfect Seperation in glm occured in sim",i,"\n")
 } else {
# ps matching
PS <- pslist[[p]]$linear
out <- Match(Tr=W,X=PS,estimand="ATT",M=1,BiasAdjust = FALSE)
   for(k in 1:3){
    Y <- Y.mat[,k]
    est.store[[k]][i,psname[p]] <- (sum(Y[out$index.treated]*out$weights)/sum(out$weights)) - (sum(Y[out$index.control]*out$weights)/sum(out$weights))
   }

# MDPS combined
# othorgonalize
L <- cbind(1,PS)
Xortho <- cbind(PS,psxlist[[p]]-(L%*%(solve(t(L)%*%L)%*%t(L)%*%psxlist[[p]])))
out <- Match(Y=Y,Tr=W,X=Xortho,estimand="ATT",M=1,BiasAdjust = FALSE)
  for(k in 1:3){
   Y <- Y.mat[,k]
   est.store[[k]][i,psmdname[p]] <- (sum(Y[out$index.treated]*out$weights)/sum(out$weights)) - (sum(Y[out$index.control]*out$weights)/sum(out$weights))
  }
  
# weighting on PS
PS.pr <- pslist[[p]]$fitted
d <- PS.pr/(1-PS.pr)
  for(k in 1:3){
   Y <- Y.mat[,k]
   est.store[[k]][i,wpsname[p]] <- mean(Y[W==1])- (sum(Y[W==0]*d[W==0])/sum(d[W==0]))
  }
}
}

for(k in 1:3){
Y <- Y.mat[,k]
# store housekeeping vars
para.store[[k]][i,"FracCtoT"] <- sum(W==0)/sum(W==1)
para.store[[k]][i,"corY.PS"] <- cor(Y,PS.true)
para.store[[k]][i,"corW.PS"] <- cor(W,PS.true)
para.store[[k]][i,"corPS.PS1"] <- cor(pslist[[1]]$fitted,PS.true)
para.store[[k]][i,"corPS.PS2"] <- cor(pslist[[2]]$fitted,PS.true)
para.store[[k]][i,"corPS.PS3"] <- cor(pslist[[3]]$fitted,PS.true)
para.store[[k]][i,"N"]  <- nrow(sim.dat)
para.store[[k]][i,"tr"] <- sum(W==1)
para.store[[k]][i,"co"] <- sum(W==0)
}

# output
if((i%%100)==0){
cat("MC Sims to go",sims-i,"\n")
}

} # end MC for fixed r

# store results for fixed r and move to next
names(est.store) <- c("Y1","Y2","Y3")
names(para.store) <- c("Y1","Y2","Y3")
big.est.store[[mm]]  <- est.store
big.para.store[[mm]] <- para.store

} # end big MC loop over r


## summarize parms settings
store.para <- matrix(NA,3*3,length(para))
colnames(store.para) <- para
rownames(store.para) <- paste(rep("Ratio CtoT",3),rep(c(1,3,5),each=3),rep(c("Y1","Y2","Y3")))
p <- 1
for(i in 1:3){
 for(k in 1:3){
store.para[p,] <- apply(big.para.store[[i]][[k]],2,mean)
 p <- p + 1
 }
}
store.para

# summarize mse
store.mse <- matrix(NA,3*3,length(est))
colnames(store.mse) <- est
rownames(store.mse) <- paste(rep("Ratio CtoT",3),rep(c(1,3,5),each=3),rep(c("Y1","Y2","Y3")))
p <- 1
for(i in 1:3){
 for(k in 1:3){
store.mse[p,] <- apply((big.est.store[[i]][[k]]-0)^2,2,mean,na.rm=T)
 p <- p + 1
 }
}
round(store.mse*100,0)

# summarize bias
store.bias <- matrix(NA,3*3,length(est))
colnames(store.bias) <- est
rownames(store.bias) <- paste(rep("Ratio CtoT",3),rep(c(1,3,5),each=3),rep(c("Y1","Y2","Y3")))
p <- 1
for(i in 1:3){
 for(k in 1:3){
store.bias[p,] <- apply(big.est.store[[i]][[k]]-0,2,mean,na.rm=T)
 p <- p + 1
 }
}
round(store.bias*100,0)